/**
 * 
 */
package edu.berkeley.nlp.discPCFG;

import edu.berkeley.nlp.util.Counter;

/**
 * EncodedDatums are sparse representations of (labeled) feature count vectors for a given data point.  Use
 * getNumActiveFeatures() to see how many features have non-zero count in a datum.  Then, use getFeatureIndex() and
 * getFeatureCount() to retreive the number and count of each non-zero feature.  Use getLabelIndex() to get the
 * label's number.
 */
public class EncodedDatum {

  public static <F,L> EncodedDatum encodeDatum(Encoding<F, L> encoding, Counter<F> features) {
    return encodeLabeledDatum(encoding, features, null, null);
  }

  public static <F,L> EncodedDatum encodeLabeledDatum(Encoding<F, L> encoding, Counter<F> features, L label, double[] weights) {
    Counter<F> knownFeatures = new Counter<F>();
    for (F feature : features.keySet()) {
      if (encoding.getFeatureIndex(feature) < 0)
        continue;
      knownFeatures.incrementCount(feature, features.getCount(feature));
    }
    int numActiveFeatures = knownFeatures.keySet().size();
    int[] featureIndexes = new int[numActiveFeatures];
    double[] featureCounts = new double[knownFeatures.keySet().size()];
    int i = 0;
    for (F feature : knownFeatures.keySet()) {
      int index = encoding.getFeatureIndex(feature);
      double count = knownFeatures.getCount(feature);
      featureIndexes[i] = index;
      featureCounts[i] = count;
      i++;
    }
    int labelIndex = encoding.getLabelIndex(label);
    EncodedDatum encodedDatum = new EncodedDatum(labelIndex, featureIndexes, featureCounts, weights);
    return encodedDatum;
  }

  int labelIndex;
  int[] featureIndexes;
  double[] featureCounts;
  double[] weights;  // the probability of each substate of the label (allows partial labeling)

  public int getLabelIndex() {
    return labelIndex;
  }

  public double[] getWeights() {
    return weights;
  }

  public int getNumActiveFeatures() {
    return featureCounts.length;
  }

  public int getFeatureIndex(int num) {
    return featureIndexes[num];
  }

  public double getFeatureCount(int num) {
    return featureCounts[num];
  }

  public EncodedDatum(int labelIndex, int[] featureIndexes, double[] featureCounts, double[] weights) {
    this.labelIndex = labelIndex;
    this.featureIndexes = featureIndexes;
    this.featureCounts = featureCounts;
    this.weights = weights;
  }
}